# Replication files for:
# "Generalizability of Heterogeneous Treatment Effect Estimates Across Samples"
# Alexander Coppock, Thomas J. Leeper, and Kevin J. Mullinix
# Proceedings of the National Academy of Sciences, Forthcoming

rm(list = ls())
library(dplyr)
library(tidyr)
library(readr)
library(reshape2)
library(estimatr)

# Superordinate IDs -------------------------------------------------------

superordinate_id_stacked <- read_rds("superordinate_id_stacked.rds")

# notes: no nonwhites, missing educ_3, female, ideo_3

superordinate_id <-
  superordinate_id_stacked %>%
  gather(col, value, age_3, pid_3, white) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z + Z_particularism, data = .)))

# Patriot Act -------------------------------------------------------------

patriot_act_stacked <- read_rds("patriot_act_stacked.rds")

# Notes: missing ideo_3

patriot_act <-
  patriot_act_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

# Elite Endorsements ------------------------------------------------------

elite_endorsements_stacked <- read_rds("elite_endorsements_stacked.rds")

# Notes: must control for (or condition on) pid because of dif. probs. of assignment.
# Notes: the "match" treatments only defined for partisans
# Notes: missing ideo_3

elite_endorsements_results_1 <-
  elite_endorsements_stacked %>%
  filter(Z_imm_match != "control") %>%
  gather(col, value, age_3, white, educ_3, female) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z + pid_3, weights = weights, data = .))) 

elite_endorsements_results_2 <-
  elite_endorsements_stacked %>%
  filter(Z_imm_match != "control") %>%
  gather(col, value, pid_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

elite_endorsements <-
  bind_rows(
    elite_endorsements_results_1,
    elite_endorsements_results_2
  )

# Free Trade --------------------------------------------------------------

free_trade_stacked <- read_rds("free_trade_stacked.rds")

free_trade <-
  free_trade_stacked %>%
  filter(sample != "gfk") %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z + Z_Hiscox_valence, weights = weights, data = .)))


# Frame Breadth -----------------------------------------------------------

frame_breadth_stacked <- read_rds("frame_breadth_stacked.rds")

frame_breadth <-
  frame_breadth_stacked %>%
  filter(sample != "gfk") %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))


# Polarization ------------------------------------------------------------

polarization_stacked <- read_rds("polarization_stacked.rds")

polarization <-
  polarization_stacked %>%
  filter(Z_Levendusky != "placebo") %>%
  filter(sample != "gfk") %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

# Immigration -------------------------------------------------------------

immigration_stacked <- read_rds("immigration_stacked.rds")

# Notes: only whites in original
# Notes: missing ideo_3

immigration <-
  immigration_stacked %>%
  filter(Z_brader_pos_neg != "control") %>%
  gather(col, value, age_3, pid_3, educ_3, female, white) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# System Threat -----------------------------------------------------------

system_threat_stacked <- read_rds("system_threat_stacked.rds")

system_threat <-
  system_threat_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

# Expert Economists -------------------------------------------------------

expert_economists_stacked_long <- read_rds("expert_economists_stacked_long.rds")

# Notes: original randomly asked only one Q; replication asked all five. To make the omnibus comparable, cluster!

expert_economists <-
  expert_economists_stacked_long %>%
  filter(!is.na(Z), !is.na(Y_s)) %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(
    lm_robust(Y_s ~ Z, weights = weights, data = .),
    clust_var = .$anon_ID
  ))

# Mental Illness ----------------------------------------------------------

mental_illness_stacked <- read_rds("mental_illness_stacked.rds")

mental_illness <-
  mental_illness_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z + Z_mcginty_policy, weights = weights, data = .))) 

# Death Penalty -----------------------------------------------------------

death_penalty_stacked <- read_rds("death_penalty_stacked.rds")

death_penalty <-
  death_penalty_stacked %>%
  filter(Z_CP_3 != "control") %>%
  gather(col, value, age_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# berganS20 ---------------------------------------------------------------

berganS20_stacked <- read_rds("berganS20_stacked.rds")

berganS20 <-
  berganS20_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# brandtS1 ----------------------------------------------------------------

brandtS1_stacked <- read_rds("brandtS1_stacked.rds")

# Notes: Missing ideo_3

brandtS1 <-
  brandtS1_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

# caprarielloS2 -----------------------------------------------------------

caprarielloS2_stacked <- read_rds("caprarielloS2_stacked.rds")

caprarielloS2 <-
  caprarielloS2_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# converseS16 -------------------------------------------------------------

converseS16_stacked <- read_rds("converseS16_stacked.rds")

# notes: missing pid_3 and ideo_3

converseS16 <-
  converseS16_stacked %>%
  gather(col, value, age_3, white, educ_3, female) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# dennyS17 ----------------------------------------------------------------

dennyS17_stacked <- read_rds("dennyS17_stacked.rds")

dennyS17 <-
  dennyS17_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# flavinS4 ----------------------------------------------------------------

flavinS4_stacked <- read_rds("flavinS4_stacked.rds")

flavinS4 <-
  flavinS4_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# gashS5 ------------------------------------------------------------------

gashS5_stacked <- read_rds("gashS5_stacked.rds")

gashS5 <-
  gashS5_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# jacobsenS7 --------------------------------------------------------------

jacobsenS7_stacked <- read_rds("jacobsenS7_stacked.rds")

jacobsenS7 <-
  jacobsenS7_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# melloS6 -----------------------------------------------------------------

melloS6_stacked <- read_rds("melloS6_stacked.rds")

# Notes: missing pid_3 and ideo_3

melloS6 <-
  melloS6_stacked %>%
  gather(col, value, age_3, white, educ_3, female) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

# parmerS15 ---------------------------------------------------------------

parmerS15_stacked <- read_rds("parmerS15_stacked.rds")

parmerS15 <-
  parmerS15_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# pedullaS18 --------------------------------------------------------------
pedullaS18_stacked <- read_rds("pedullaS18_stacked.rds")

# Notes: removed age_3_More than 60 due to no variation in Y

pedullaS18 <-
  pedullaS18_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) %>%
  filter(group != "age_3_More than 60") # no variation in Y

# piazzaS8 ----------------------------------------------------------------
piazzaS8_stacked <- read_rds("piazzaS8_stacked.rds")

piazzaS8 <-
  piazzaS8_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# shaferS9 ----------------------------------------------------------------
shaferS9_stacked <- read_rds("shaferS9_stacked.rds")

shaferS9 <-
  shaferS9_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# thompsonS10 -------------------------------------------------------------
thompsonS10_stacked <- read_rds("thompsonS10_stacked.rds")

thompsonS10 <-
  thompsonS10_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# turagaS11 ---------------------------------------------------------------
turagaS11_stacked <- read_rds("turagaS11_stacked.rds")

turagaS11 <-
  turagaS11_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .))) 

# wallaceS12 --------------------------------------------------------------

wallaceS12_stacked <- read_rds("wallaceS12_stacked.rds")

wallaceS12 <-
  wallaceS12_stacked %>%
  gather(col, value, age_3, pid_3, white, educ_3, female, ideo_3) %>%
  mutate(group = paste0(col, "_", value)) %>%
  group_by(group, sample) %>%
  do(tidy(lm_robust(Y_s ~ Z, weights = weights, data = .)))

# Results -----------------------------------------------------------------

results <-
  bind_rows(
    superordinate_id = superordinate_id,
    patriot_act = patriot_act,
    elite_endorsements = elite_endorsements,
    free_trade = free_trade,
    frame_breadth = frame_breadth,
    polarization = polarization,
    immigration = immigration,
    system_threat = system_threat,
    expert_economists = expert_economists,
    mental_illness = mental_illness,
    death_penalty = death_penalty,
    berganS20 = berganS20,
    brandtS1 = brandtS1,
    caprarielloS2 = caprarielloS2,
    converseS16 = converseS16,
    dennyS17 = dennyS17,
    flavinS4 = flavinS4,
    gashS5 = gashS5,
    jacobsenS7 = jacobsenS7,
    melloS6 = melloS6,
    parmerS15 = parmerS15,
    pedullaS18 = pedullaS18,
    piazzaS8 = piazzaS8,
    shaferS9 = shaferS9,
    thompsonS10 = thompsonS10,
    turagaS11 = turagaS11,
    wallaceS12 = wallaceS12,
    .id = "study"
  )

results$covariate <-
  stringr::str_split(results$group,
                     pattern = "_",
                     n = 3,
                     simplify = TRUE)[, 1]

study_labels_df <- read_rds("CLM_study_labels_df.rds")

results <-
  results %>%
  left_join(study_labels_df) %>%
  mutate(sig = as.numeric(p.value <= 0.05))

results <- 
  within(results,{
    sample[sample == "gfk"] <- "original"
  })

write_rds(results, path = "CLM_results.rds")

# Reshape -----------------------------------------------------------------

scatter_df <-
  results %>%
  filter(term == "Z") %>%
  melt %>%
  dcast(study + study_label + covariate + group ~ variable + sample) %>%
  mutate(
    sig_mt = factor(
      sig_mt,
      levels = c(0, 1),
      labels = c("Not Significant", "Significant")
    ),
    sig_original = factor(
      sig_original,
      levels = c(0, 1),
      labels = c("Not Significant", "Significant")
    )
  )


scatter_df <-
  scatter_df %>%
  mutate(
    group_label = factor(group,
                         levels = c("age_3_18 - 39", "age_3_40 - 59", "age_3_More than 60",
                                    "educ_3_Less than College", "educ_3_College", "educ_3_Graduate School",
                                    "female_0", "female_1", "female_place_holder",
                                    "ideo_3_Liberal", "ideo_3_Moderate", "ideo_3_Conservative",
                                    "pid_3_Democrat", "pid_3_Independent", "pid_3_Republican",
                                    "white_0", "white_1", "white_place_holder"),
                         labels = c("Age: 18 - 39", "Age: 40 - 59", "Age: More than 60",
                                    "Less than College", "College", "Graduate School",
                                    "Men", "Women", "",
                                    "Liberal", "Moderate", "Conservative",
                                    "Democrat", "Independent", "Republican",
                                    "Nonwhite", "White", " ")
    )
  )

scatter_df <-
  scatter_df %>%
  mutate(est_diff_in_cates = estimate_mt - estimate_original,
         std.error_diff_in_cates = sqrt(std.error_mt^2 + std.error_original^2),
         ui_diff_in_cates = est_diff_in_cates + 1.96*std.error_diff_in_cates,
         li_diff_in_cates = est_diff_in_cates - 1.96*std.error_diff_in_cates,
         p_diff_in_cates = 2*pnorm(abs(est_diff_in_cates/std.error_diff_in_cates), lower.tail = FALSE),
         `Difference in CATES` = factor((p_diff_in_cates < 0.05), levels = c(T, F), labels = c("Significant", "Not Significant"))
  ) %>%
  filter(!is.na(`Difference in CATES`))


write_rds(scatter_df, path = "CLM_scatter_df.rds")
